** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.33: Attempted Items by Gender, Subject, and Exam Day ////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Phase 2 norm_scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear

*** Creating variables
encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Language
rename d_sub6 Mathematics
rename d_sub7 Physics
rename d_sub8 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"
label var Language "Foreign Language"

*** Creating new dependent and explanatory variables:

gen attempted=.
levelsof subject, local(levels) 
foreach i of local levels {
replace attempted=12-missing_p2_`i'1-missing_p2_`i'2-missing_p2_`i'3-missing_p2_`i'4-missing_p2_`i'5-missing_p2_`i'6-missing_p2_`i'7-missing_p2_`i'8-missing_p2_`i'9-missing_p2_`i'10-missing_p2_`i'11-missing_p2_`i'12 if subject=="`i'"
}
sum attempted
tab year, sum(attempted)

* Days of admission exam
gen exam_day=1 if Portuguese==1 | Biology==1 //Day 1: Portuguese and Biology
replace exam_day=2  if Chemistry==1 | History==1 //Day 2: Chemistry and History
replace exam_day=3  if Physics==1 | Geography==1 //Day 3: Physics and Geography
replace exam_day=4  if Mathematics==1 | Language==1 //Day 4: Mathematics and English
tab exam_day
label var exam_day "Days of admission exam"

* # of priority discipline per applicant, per day of the admission exam
levelsof exam_day, local(levels) 
foreach i of local levels {
bys inscri2: egen n_prior_day`i'=sum(priority) if exam_day==`i'
bys inscri2: egen n_priority_day`i'=max(n_prior_day`i')
drop n_prior_day`i'
sum n_priority_day`i'
label var n_priority_day`i' "\# priority subjects in exam day `i'"
}
tab career_choice if n_priority_day1==2 // only 1 career has 2 priority subjects in the same day of the admission exam (Phonology)

** Create other priority variables:

* Only a given subject is priority:
gen only_priority=0
levelsof exam_day, local(levels) 
foreach i of local levels {
replace only_priority=1 if priority==1 & exam_day==`i' & n_priority_day`i'==1
}

* Only the other subject is priority:
gen other_priority=0
levelsof exam_day, local(levels) 
foreach i of local levels {
replace other_priority=1 if priority==0 & exam_day==`i' & n_priority_day`i'==1
}

* Both priorities
gen both_priority=0
levelsof exam_day, local(levels) 
foreach i of local levels {
replace both_priority=1 if priority==1 & exam_day==`i' & n_priority_day`i'==2
}

** P1 scores: P1 normalized subject-specific scores
forvalues i=2(1)4 {
gen norm_p1score`i'=norm_p1score^`i'
sum norm_p1score`i'
}

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

** ENEM

foreach v in norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}
drop norm_enem_w_ave_g

** Phase 1 scores

foreach v in norm_p1score {
tab year, sum(`v')
bys year subject female: egen gs_`v'_ave=mean(`v')
gen gs_`v'=`v'-gs_`v'_ave
bys year female subject: sum gs_`v'
drop gs_`v'_ave
}

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year==2000
tab year

*********************************************************************************
****************   Regressions by exam day **************************************
*********************************************************************************

sort inscri2
order subject gs_norm_p1score 
label var gs_norm_p1score "Phase 1 scores"

gen no_priority = 1-only_priority-other_priority-both_priority
tab no_priority

estimates clear

global outcome_list = "attempted"

local count_outcome = 1
foreach outcome in $outcome_list {

if `count_outcome' ==1 local outcome_name = "attempted"
if `count_outcome' ==1 local outcome_title = "Attempted"

local count_enem=1

foreach j in norm_enem_w_g {
	
	if `count_enem' ==1 loc title_enem= "ENEM"

	levelsof subject, local(levels) 
	foreach i of local levels {
	di "`i'" 
	di "`j'"
	
	forvalues sex =1/2{ // 1= male, 2=female
	
		if "`i'"!="lang"{
			reg `outcome' only_priority other_priority both_priority  `j' gs_norm_p1score if subject=="`i'" & sex==`sex', noomitted
			sum  no_priority if subject=="`i'" & sex==`sex'
			estadd scalar shareno = r(mean)
			estimates store reg2`sex'_`i'
			}
		}
	
	* TEST ACROSS GENDER
	
	if "`i'"!="lang"{
		reg `outcome' i.sex##c.(only_priority other_priority both_priority  `j' gs_norm_p1score)     if subject=="`i'"
		test 2.sex#c.only_priority 2.sex#c.other_priority 2.sex#c.both_priority 2.sex#c.`j' 2.sex#c.gs_norm_p1score
		estadd scalar p_value_across_gender = r(p)
		estimates store reg2other_`i'
		}
	}

	* TEST ACROSS SUBJECTS (WITHIN GENDER)
	forvalues sex =1/2{
		forvalues count =2{
		
			suest reg`count'`sex'_port  reg`count'`sex'_biol reg`count'`sex'_chem reg`count'`sex'_hist reg`count'`sex'_phy reg`count'`sex'_geog  
			test [reg`count'`sex'_port_mean  = reg`count'`sex'_biol_mean = reg`count'`sex'_chem_mean = reg`count'`sex'_hist_mean = reg`count'`sex'_phy_mean = reg`count'`sex'_geog_mean ]
			local aux = `r(p)'
			
			est restore reg`count'`sex'_port  
			estadd scalar p_value_across_sub=`aux'
			estimates store reg`count'`sex'_port  
			
		}
	}
		
	
	* TEST - Across Subjects/Gender
	forvalues count =2{
		suest reg`count'1_port  reg`count'1_biol reg`count'1_chem reg`count'1_hist reg`count'1_phy reg`count'1_geog   reg`count'2_port  reg`count'2_biol reg`count'2_chem reg`count'2_hist reg`count'2_phy reg`count'2_geog  
		test [reg`count'1_port_mean  = reg`count'1_biol_mean = reg`count'1_chem_mean = reg`count'1_hist_mean = reg`count'1_phy_mean = reg`count'1_geog_mean =reg`count'2_port_mean  = reg`count'2_biol_mean = reg`count'2_chem_mean = reg`count'2_hist_mean = reg`count'2_phy_mean = reg`count'2_geog_mean]
		local aux = `r(p)'
		est restore reg`count'other_port  
		estadd scalar p_value_across_both = `aux'
		estimates store reg`count'other_port
		}

	forvalues count = 2 {
	
		if `count' ==2 loc title= "_p1"
		
			if `count'==2{	
		
			esttab reg`count'1_port  reg`count'1_biol reg`count'1_chem  reg`count'1_hist  reg`count'1_phy reg`count'1_geog reg`count'1_math  using "Output/`outcome_name'_sex_`j'`title'.tex", se star(* 0.10 ** 0.05 *** 0.01) booktabs nogap  ///
			 b(%7.3f) se(%7.3f)  replace f label nonumber   keep(only_priority other_priority both_priority gs_norm_p1score  `j' ) ///
			refcat(only_priority " \\ \multicolumn{7}{l}{\textit{Men}} \\", nolabel)	mtitle("Port" "Bio" "Chem" "Hist" "Phys" "Geog" "Math" "EngFr") mgroups("Day 1" "Day 2" "Day 3" "Day 4",  pattern(1 0 1 0 1 0 1 0)  prefix(\multicolumn{@span}{c}{) suffix(})span erepeat(\cmidrule(lr){@span})) stats(r2_a N  shareno p_value_across_sub, fmt(%7.3f %9.0fc  %7.3f  %7.3f) ///
			 labels("$\bar{R}^2$" "Number of observations"  "Proportion with no priority test today"  "P-value (across subjects)")) unstack noomitted
			
			esttab  reg`count'2_port  reg`count'2_biol reg`count'2_chem  reg`count'2_hist  reg`count'2_phy reg`count'2_geog reg`count'2_math  using "Output/`outcome_name'_sex_`j'`title'.tex", se star(* 0.10 ** 0.05 *** 0.01) booktabs nogap ///
			stats(r2_a N  shareno p_value_across_sub, fmt( %7.3f %9.0fc  %7.3f  %7.3f) ///
			 labels("$\bar{R}^2$"  "Number of observations"  "Proportion with no priority test today"  "P-value (across subjects)"  )) ///
			 b(%7.3f) se(%7.3f)  append f label nomtitle nonum collabels(none) keep(only_priority other_priority both_priority gs_norm_p1score  `j')  /// 
			refcat(only_priority " \\ \multicolumn{7}{l}{\textit{Women}} \\", nolabel)	unstack noomitted
			
			*pvalue
			esttab  reg`count'other_port  reg`count'other_biol reg`count'other_chem  reg`count'other_hist  reg`count'other_phy reg`count'other_geog reg`count'other_math  using "Output/`outcome_name'_sex_`j'`title'.tex", se star(* 0.10 ** 0.05 *** 0.01) booktabs nogap ///
			stats(p_value_across_gender p_value_across_both, fmt( %7.3f %7.3f  %7.3f) ///
			 labels("P-value (across gender)"  "P-value (across gender and subjects)" )) ///
			 b(%7.3f) se(%7.3f)  append f label nomtitle nonum collabels(none) keep(2.sex#c.only_priority 2.sex#c.other_priority 2.sex#c.both_priority 2.sex#c.`j' 2.sex#c.gs_norm_p1score) coeflabels(2.sex#c.only_priority "Only priority" 2.sex#c.other_priority  "Only other priority" 2.sex#c.both_priority "Both priorities" 2.sex#c.`j' "`title_enem'" 2.sex#c.gs_norm_p1score "Phase 1 scores") /// 
			refcat(2.sex#c.only_priority " \\ \multicolumn{8}{l}{\textit{Differences}} \\", nolabel)	unstack noomitted
			
		}
		
			
	}
		
		local ++count_enem
}
local ++count_outcome
}
